1   package org.apache.lucene.analysis.util;
2   
3   /*
4    * Licensed to the Apache Software Foundation (ASF) under one or more
5    * contributor license agreements.  See the NOTICE file distributed with
6    * this work for additional information regarding copyright ownership.
7    * The ASF licenses this file to You under the Apache License, Version 2.0
8    * (the "License"); you may not use this file except in compliance with
9    * the License.  You may obtain a copy of the License at
10   *
11   *     http://www.apache.org/licenses/LICENSE-2.0
12   *
13   * Unless required by applicable law or agreed to in writing, software
14   * distributed under the License is distributed on an "AS IS" BASIS,
15   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
16   * See the License for the specific language governing permissions and
17   * limitations under the License.
18   */
19  
20  import java.util.Arrays;
21  import java.util.AbstractMap;
22  import java.util.AbstractSet;
23  import java.util.Iterator;
24  import java.util.Map;
25  import java.util.Set;
26  
27  import org.apache.lucene.analysis.util.CharacterUtils;
28  
29  /**
30   * A simple class that stores key Strings as char[]'s in a
31   * hash table. Note that this is not a general purpose
32   * class.  For example, it cannot remove items from the
33   * map, nor does it resize its hash table to be smaller,
34   * etc.  It is designed to be quick to retrieve items
35   * by char[] keys without the necessity of converting
36   * to a String first.
37   */
38  public class CharArrayMap<V> extends AbstractMap<Object,V> {
39    // private only because missing generics
40    private static final CharArrayMap<?> EMPTY_MAP = new EmptyCharArrayMap<>();
41  
42    private final static int INIT_SIZE = 8;
43    private final CharacterUtils charUtils;
44    private boolean ignoreCase;  
45    private int count;
46    char[][] keys; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
47    V[] values; // package private because used in CharArraySet's non Set-conform CharArraySetIterator
48  
49    /**
50     * Create map with enough capacity to hold startSize terms
51     *
52     * @param startSize
53     *          the initial capacity
54     * @param ignoreCase
55     *          <code>false</code> if and only if the set should be case sensitive
56     *          otherwise <code>true</code>.
57     */
58    @SuppressWarnings("unchecked")
59    public CharArrayMap(int startSize, boolean ignoreCase) {
60      this.ignoreCase = ignoreCase;
61      int size = INIT_SIZE;
62      while(startSize + (startSize>>2) > size)
63        size <<= 1;
64      keys = new char[size][];
65      values = (V[]) new Object[size];
66      this.charUtils = CharacterUtils.getInstance();
67    }
68  
69    /**
70     * Creates a map from the mappings in another map. 
71     *
72     * @param c
73     *          a map whose mappings to be copied
74     * @param ignoreCase
75     *          <code>false</code> if and only if the set should be case sensitive
76     *          otherwise <code>true</code>.
77     */
78    public CharArrayMap(Map<?,? extends V> c, boolean ignoreCase) {
79      this(c.size(), ignoreCase);
80      putAll(c);
81    }
82    
83    /** Create set from the supplied map (used internally for readonly maps...) */
84    private CharArrayMap(CharArrayMap<V> toCopy){
85      this.keys = toCopy.keys;
86      this.values = toCopy.values;
87      this.ignoreCase = toCopy.ignoreCase;
88      this.count = toCopy.count;
89      this.charUtils = toCopy.charUtils;
90    }
91    
92    /** Clears all entries in this map. This method is supported for reusing, but not {@link Map#remove}. */
93    @Override
94    public void clear() {
95      count = 0;
96      Arrays.fill(keys, null);
97      Arrays.fill(values, null);
98    }
99  
100   /** true if the <code>len</code> chars of <code>text</code> starting at <code>off</code>
101    * are in the {@link #keySet()} */
102   public boolean containsKey(char[] text, int off, int len) {
103     return keys[getSlot(text, off, len)] != null;
104   }
105 
106   /** true if the <code>CharSequence</code> is in the {@link #keySet()} */
107   public boolean containsKey(CharSequence cs) {
108     return keys[getSlot(cs)] != null;
109   }
110 
111   @Override
112   public boolean containsKey(Object o) {
113     if (o instanceof char[]) {
114       final char[] text = (char[])o;
115       return containsKey(text, 0, text.length);
116     } 
117     return containsKey(o.toString());
118   }
119 
120   /** returns the value of the mapping of <code>len</code> chars of <code>text</code>
121    * starting at <code>off</code> */
122   public V get(char[] text, int off, int len) {
123     return values[getSlot(text, off, len)];
124   }
125 
126   /** returns the value of the mapping of the chars inside this {@code CharSequence} */
127   public V get(CharSequence cs) {
128     return values[getSlot(cs)];
129   }
130 
131   @Override
132   public V get(Object o) {
133     if (o instanceof char[]) {
134       final char[] text = (char[])o;
135       return get(text, 0, text.length);
136     } 
137     return get(o.toString());
138   }
139 
140   private int getSlot(char[] text, int off, int len) {
141     int code = getHashCode(text, off, len);
142     int pos = code & (keys.length-1);
143     char[] text2 = keys[pos];
144     if (text2 != null && !equals(text, off, len, text2)) {
145       final int inc = ((code>>8)+code)|1;
146       do {
147         code += inc;
148         pos = code & (keys.length-1);
149         text2 = keys[pos];
150       } while (text2 != null && !equals(text, off, len, text2));
151     }
152     return pos;
153   }
154 
155   /** Returns true if the String is in the set */  
156   private int getSlot(CharSequence text) {
157     int code = getHashCode(text);
158     int pos = code & (keys.length-1);
159     char[] text2 = keys[pos];
160     if (text2 != null && !equals(text, text2)) {
161       final int inc = ((code>>8)+code)|1;
162       do {
163         code += inc;
164         pos = code & (keys.length-1);
165         text2 = keys[pos];
166       } while (text2 != null && !equals(text, text2));
167     }
168     return pos;
169   }
170 
171   /** Add the given mapping. */
172   public V put(CharSequence text, V value) {
173     return put(text.toString(), value); // could be more efficient
174   }
175 
176   @Override
177   public V put(Object o, V value) {
178     if (o instanceof char[]) {
179       return put((char[])o, value);
180     }
181     return put(o.toString(), value);
182   }
183   
184   /** Add the given mapping. */
185   public V put(String text, V value) {
186     return put(text.toCharArray(), value);
187   }
188 
189   /** Add the given mapping.
190    * If ignoreCase is true for this Set, the text array will be directly modified.
191    * The user should never modify this text array after calling this method.
192    */
193   public V put(char[] text, V value) {
194     if (ignoreCase) {
195       charUtils.toLowerCase(text, 0, text.length);
196     }
197     int slot = getSlot(text, 0, text.length);
198     if (keys[slot] != null) {
199       final V oldValue = values[slot];
200       values[slot] = value;
201       return oldValue;
202     }
203     keys[slot] = text;
204     values[slot] = value;
205     count++;
206 
207     if (count + (count>>2) > keys.length) {
208       rehash();
209     }
210 
211     return null;
212   }
213 
214   @SuppressWarnings("unchecked")
215   private void rehash() {
216     assert keys.length == values.length;
217     final int newSize = 2*keys.length;
218     final char[][] oldkeys = keys;
219     final V[] oldvalues = values;
220     keys = new char[newSize][];
221     values = (V[]) new Object[newSize];
222 
223     for(int i=0; i<oldkeys.length; i++) {
224       char[] text = oldkeys[i];
225       if (text != null) {
226         // todo: could be faster... no need to compare strings on collision
227         final int slot = getSlot(text,0,text.length);
228         keys[slot] = text;
229         values[slot] = oldvalues[i];
230       }
231     }
232   }
233   
234   private boolean equals(char[] text1, int off, int len, char[] text2) {
235     if (len != text2.length)
236       return false;
237     final int limit = off+len;
238     if (ignoreCase) {
239       for(int i=0;i<len;) {
240         final int codePointAt = charUtils.codePointAt(text1, off+i, limit);
241         if (Character.toLowerCase(codePointAt) != charUtils.codePointAt(text2, i, text2.length))
242           return false;
243         i += Character.charCount(codePointAt); 
244       }
245     } else {
246       for(int i=0;i<len;i++) {
247         if (text1[off+i] != text2[i])
248           return false;
249       }
250     }
251     return true;
252   }
253 
254   private boolean equals(CharSequence text1, char[] text2) {
255     int len = text1.length();
256     if (len != text2.length)
257       return false;
258     if (ignoreCase) {
259       for(int i=0;i<len;) {
260         final int codePointAt = charUtils.codePointAt(text1, i);
261         if (Character.toLowerCase(codePointAt) != charUtils.codePointAt(text2, i, text2.length))
262           return false;
263         i += Character.charCount(codePointAt);
264       }
265     } else {
266       for(int i=0;i<len;i++) {
267         if (text1.charAt(i) != text2[i])
268           return false;
269       }
270     }
271     return true;
272   }
273   
274   private int getHashCode(char[] text, int offset, int len) {
275     if (text == null)
276       throw new NullPointerException();
277     int code = 0;
278     final int stop = offset + len;
279     if (ignoreCase) {
280       for (int i=offset; i<stop;) {
281         final int codePointAt = charUtils.codePointAt(text, i, stop);
282         code = code*31 + Character.toLowerCase(codePointAt);
283         i += Character.charCount(codePointAt);
284       }
285     } else {
286       for (int i=offset; i<stop; i++) {
287         code = code*31 + text[i];
288       }
289     }
290     return code;
291   }
292 
293   private int getHashCode(CharSequence text) {
294     if (text == null)
295       throw new NullPointerException();
296     int code = 0;
297     int len = text.length();
298     if (ignoreCase) {
299       for (int i=0; i<len;) {
300         int codePointAt = charUtils.codePointAt(text, i);
301         code = code*31 + Character.toLowerCase(codePointAt);
302         i += Character.charCount(codePointAt);
303       }
304     } else {
305       for (int i=0; i<len; i++) {
306         code = code*31 + text.charAt(i);
307       }
308     }
309     return code;
310   }
311 
312   @Override
313   public V remove(Object key) {
314     throw new UnsupportedOperationException();
315   }
316 
317   @Override
318   public int size() {
319     return count;
320   }
321 
322   @Override
323   public String toString() {
324     final StringBuilder sb = new StringBuilder("{");
325     for (Map.Entry<Object,V> entry : entrySet()) {
326       if (sb.length()>1) sb.append(", ");
327       sb.append(entry);
328     }
329     return sb.append('}').toString();
330   }
331 
332   private EntrySet entrySet = null;
333   private CharArraySet keySet = null;
334   
335   EntrySet createEntrySet() {
336     return new EntrySet(true);
337   }
338   
339   @Override
340   public final EntrySet entrySet() {
341     if (entrySet == null) {
342       entrySet = createEntrySet();
343     }
344     return entrySet;
345   }
346   
347   // helper for CharArraySet to not produce endless recursion
348   final Set<Object> originalKeySet() {
349     return super.keySet();
350   }
351 
352   /** Returns an {@link CharArraySet} view on the map's keys.
353    * The set will use the same {@code matchVersion} as this map. */
354   @Override @SuppressWarnings({"unchecked","rawtypes"})
355   public final CharArraySet keySet() {
356     if (keySet == null) {
357       // prevent adding of entries
358       keySet = new CharArraySet((CharArrayMap) this) {
359         @Override
360         public boolean add(Object o) {
361           throw new UnsupportedOperationException();
362         }
363         @Override
364         public boolean add(CharSequence text) {
365           throw new UnsupportedOperationException();
366         }
367         @Override
368         public boolean add(String text) {
369           throw new UnsupportedOperationException();
370         }
371         @Override
372         public boolean add(char[] text) {
373           throw new UnsupportedOperationException();
374         }
375       };
376     }
377     return keySet;
378   }
379 
380   /** public iterator class so efficient methods are exposed to users */
381   public class EntryIterator implements Iterator<Map.Entry<Object,V>> {
382     private int pos=-1;
383     private int lastPos;
384     private final boolean allowModify;
385     
386     private EntryIterator(boolean allowModify) {
387       this.allowModify = allowModify;
388       goNext();
389     }
390 
391     private void goNext() {
392       lastPos = pos;
393       pos++;
394       while (pos < keys.length && keys[pos] == null) pos++;
395     }
396 
397     @Override
398     public boolean hasNext() {
399       return pos < keys.length;
400     }
401 
402     /** gets the next key... do not modify the returned char[] */
403     public char[] nextKey() {
404       goNext();
405       return keys[lastPos];
406     }
407 
408     /** gets the next key as a newly created String object */
409     public String nextKeyString() {
410       return new String(nextKey());
411     }
412 
413     /** returns the value associated with the last key returned */
414     public V currentValue() {
415       return values[lastPos];
416     }
417 
418     /** sets the value associated with the last key returned */    
419     public V setValue(V value) {
420       if (!allowModify)
421         throw new UnsupportedOperationException();
422       V old = values[lastPos];
423       values[lastPos] = value;
424       return old;      
425     }
426 
427     /** use nextCharArray() + currentValue() for better efficiency. */
428     @Override
429     public Map.Entry<Object,V> next() {
430       goNext();
431       return new MapEntry(lastPos, allowModify);
432     }
433 
434     @Override
435     public void remove() {
436       throw new UnsupportedOperationException();
437     }
438   }
439 
440   private final class MapEntry implements Map.Entry<Object,V> {
441     private final int pos;
442     private final boolean allowModify;
443 
444     private MapEntry(int pos, boolean allowModify) {
445       this.pos = pos;
446       this.allowModify = allowModify;
447     }
448 
449     @Override
450     public Object getKey() {
451       // we must clone here, as putAll to another CharArrayMap
452       // with other case sensitivity flag would corrupt the keys
453       return keys[pos].clone();
454     }
455 
456     @Override
457     public V getValue() {
458       return values[pos];
459     }
460 
461     @Override
462     public V setValue(V value) {
463       if (!allowModify)
464         throw new UnsupportedOperationException();
465       final V old = values[pos];
466       values[pos] = value;
467       return old;
468     }
469 
470     @Override
471     public String toString() {
472       return new StringBuilder().append(keys[pos]).append('=')
473         .append((values[pos] == CharArrayMap.this) ? "(this Map)" : values[pos])
474         .toString();
475     }
476   }
477 
478   /** public EntrySet class so efficient methods are exposed to users */
479   public final class EntrySet extends AbstractSet<Map.Entry<Object,V>> {
480     private final boolean allowModify;
481     
482     private EntrySet(boolean allowModify) {
483       this.allowModify = allowModify;
484     }
485   
486     @Override
487     public EntryIterator iterator() {
488       return new EntryIterator(allowModify);
489     }
490     
491     @Override
492     @SuppressWarnings("unchecked")
493     public boolean contains(Object o) {
494       if (!(o instanceof Map.Entry))
495         return false;
496       final Map.Entry<Object,V> e = (Map.Entry<Object,V>)o;
497       final Object key = e.getKey();
498       final Object val = e.getValue();
499       final Object v = get(key);
500       return v == null ? val == null : v.equals(val);
501     }
502     
503     @Override
504     public boolean remove(Object o) {
505       throw new UnsupportedOperationException();
506     }
507     
508     @Override
509     public int size() {
510       return count;
511     }
512     
513     @Override
514     public void clear() {
515       if (!allowModify)
516         throw new UnsupportedOperationException();
517       CharArrayMap.this.clear();
518     }
519   }
520   
521   /**
522    * Returns an unmodifiable {@link CharArrayMap}. This allows to provide
523    * unmodifiable views of internal map for "read-only" use.
524    * 
525    * @param map
526    *          a map for which the unmodifiable map is returned.
527    * @return an new unmodifiable {@link CharArrayMap}.
528    * @throws NullPointerException
529    *           if the given map is <code>null</code>.
530    */
531   public static <V> CharArrayMap<V> unmodifiableMap(CharArrayMap<V> map) {
532     if (map == null)
533       throw new NullPointerException("Given map is null");
534     if (map == emptyMap() || map.isEmpty())
535       return emptyMap();
536     if (map instanceof UnmodifiableCharArrayMap)
537       return map;
538     return new UnmodifiableCharArrayMap<>(map);
539   }
540 
541   /**
542    * Returns a copy of the given map as a {@link CharArrayMap}. If the given map
543    * is a {@link CharArrayMap} the ignoreCase property will be preserved.
544    * 
545    * @param map
546    *          a map to copy
547    * @return a copy of the given map as a {@link CharArrayMap}. If the given map
548    *         is a {@link CharArrayMap} the ignoreCase property as well as the
549    *         matchVersion will be of the given map will be preserved.
550    */
551   @SuppressWarnings("unchecked")
552   public static <V> CharArrayMap<V> copy(final Map<?,? extends V> map) {
553     if(map == EMPTY_MAP)
554       return emptyMap();
555     if(map instanceof CharArrayMap) {
556       CharArrayMap<V> m = (CharArrayMap<V>) map;
557       // use fast path instead of iterating all values
558       // this is even on very small sets ~10 times faster than iterating
559       final char[][] keys = new char[m.keys.length][];
560       System.arraycopy(m.keys, 0, keys, 0, keys.length);
561       final V[] values = (V[]) new Object[m.values.length];
562       System.arraycopy(m.values, 0, values, 0, values.length);
563       m = new CharArrayMap<>(m);
564       m.keys = keys;
565       m.values = values;
566       return m;
567     }
568     // In jdk-9b54 or later, a plain diamond causes compile error with "-source 1.7":
569     return new CharArrayMap<V>(map, false);
570   }
571   
572   /** Returns an empty, unmodifiable map. */
573   @SuppressWarnings("unchecked")
574   public static <V> CharArrayMap<V> emptyMap() {
575     return (CharArrayMap<V>) EMPTY_MAP;
576   }
577   
578   // package private CharArraySet instanceof check in CharArraySet
579   static class UnmodifiableCharArrayMap<V> extends CharArrayMap<V> {
580 
581     UnmodifiableCharArrayMap(CharArrayMap<V> map) {
582       super(map);
583     }
584 
585     @Override
586     public void clear() {
587       throw new UnsupportedOperationException();
588     }
589 
590     @Override
591     public V put(Object o, V val){
592       throw new UnsupportedOperationException();
593     }
594     
595     @Override
596     public V put(char[] text, V val) {
597       throw new UnsupportedOperationException();
598     }
599 
600     @Override
601     public V put(CharSequence text, V val) {
602       throw new UnsupportedOperationException();
603     }
604 
605     @Override
606     public V put(String text, V val) {
607       throw new UnsupportedOperationException();
608     }
609     
610     @Override
611     public V remove(Object key) {
612       throw new UnsupportedOperationException();
613     }
614   
615     @Override
616     EntrySet createEntrySet() {
617       return new EntrySet(false);
618     }
619   }
620   
621   /**
622    * Empty {@link org.apache.lucene.analysis.util.CharArrayMap.UnmodifiableCharArrayMap} optimized for speed.
623    * Contains checks will always return <code>false</code> or throw
624    * NPE if necessary.
625    */
626   private static final class EmptyCharArrayMap<V> extends UnmodifiableCharArrayMap<V> {
627     EmptyCharArrayMap() {
628       super(new CharArrayMap<V>(0, false));
629     }
630     
631     @Override
632     public boolean containsKey(char[] text, int off, int len) {
633       if(text == null)
634         throw new NullPointerException();
635       return false;
636     }
637 
638     @Override
639     public boolean containsKey(CharSequence cs) {
640       if(cs == null)
641         throw new NullPointerException();
642       return false;
643     }
644 
645     @Override
646     public boolean containsKey(Object o) {
647       if(o == null)
648         throw new NullPointerException();
649       return false;
650     }
651     
652     @Override
653     public V get(char[] text, int off, int len) {
654       if(text == null)
655         throw new NullPointerException();
656       return null;
657     }
658 
659     @Override
660     public V get(CharSequence cs) {
661       if(cs == null)
662         throw new NullPointerException();
663       return null;
664     }
665 
666     @Override
667     public V get(Object o) {
668       if(o == null)
669         throw new NullPointerException();
670       return null;
671     }
672   }
673 }